import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *
from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.tests import *import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')%load_ext kedro.ipythoncatalogdataset = catalog.load("dataset_id_742")
dataset.head()[09/25/24 19:12:02] INFO Loading data from dataset_id_742 (MlflowCSVDataset)... data_catalog.py:539
| checking_status | duration | credit_history | purpose | credit_amount | savings_status | employment | installment_commitment | personal_status | other_parties | ... | X_3 | X_4 | X_5 | X_6 | X_7 | X_8 | X_9 | X_10 | X_11 | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0<=X<200 | 30 | all paid | furniture/equipment | 3496.0 | >=1000 | 1<=X<4 | 4 | male single | none | ... | 0.449680 | 4.055515 | 0.001283 | 0.191197 | 0.449680 | 0.305551 | 0.750874 | 0.765611 | 0.623008 | False |
| 1 | no checking | 42 | existing paid | radio/tv | 7166.0 | no known savings | 4<=X<7 | 2 | male mar/wid | none | ... | 0.623008 | 4.962082 | 0.396208 | 0.525043 | 0.378336 | 0.887402 | 0.342156 | 0.938944 | 0.266451 | True |
| 2 | no checking | 18 | existing paid | furniture/equipment | 1984.0 | <100 | 1<=X<4 | 4 | male single | none | ... | 0.887402 | 4.421564 | 0.818652 | 0.508065 | 0.898812 | 0.255605 | 0.319191 | 0.535321 | 0.444986 | True |
| 3 | no checking | 48 | critical/other existing credit | radio/tv | 3578.0 | no known savings | >=7 | 4 | male single | none | ... | 0.818652 | 6.080646 | 0.445092 | 0.974743 | 0.433125 | 0.009822 | 0.782929 | 0.231668 | 0.204295 | True |
| 4 | 0<=X<200 | 30 | existing paid | furniture/equipment | 3441.0 | 100<=X<500 | 1<=X<4 | 2 | female div/dep/mar | co applicant | ... | 0.319191 | 6.353212 | 0.078052 | 0.104358 | 0.348942 | 0.513697 | 0.839659 | 0.362052 | 0.639795 | False |
5 rows × 33 columns
df = catalog.load("preprocessed_dataset")
df.head()[09/25/24 19:14:56] INFO Loading data from preprocessed_dataset (MlflowParquetDataset)... data_catalog.py:539
| duration | credit_amount | installment_commitment | residence_since | age | existing_credits | num_dependents | X_1 | X_2 | X_3 | ... | job_high qualif/self emp/mgmt | job_skilled | job_unemp/unskilled non res | job_unskilled resident | own_telephone_none | own_telephone_yes | foreign_worker_no | foreign_worker_yes | health_status_bad | health_status_good | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.616420 | -0.058349 | 0.874126 | -0.751108 | -0.295901 | -0.692308 | 2.064742 | -1.703735 | -1.129340 | -0.302667 | ... | False | True | False | False | False | True | False | True | False | True |
| 1 | 1.585887 | 1.013586 | -0.985716 | 1.058791 | -0.723504 | -0.692308 | -0.484322 | 0.753875 | 0.840695 | 0.260109 | ... | False | True | False | False | False | True | False | True | False | True |
| 2 | -0.353048 | -0.499975 | 0.874126 | 1.058791 | 0.815866 | 1.230769 | -0.484322 | 0.013467 | -0.487520 | 1.118564 | ... | False | True | False | False | True | False | False | True | False | True |
| 3 | 2.070621 | -0.034398 | 0.874126 | -1.656058 | 0.815866 | -0.692308 | -0.484322 | 1.370483 | -0.871247 | 0.895340 | ... | False | True | False | False | False | True | False | True | False | True |
| 4 | 0.616420 | -0.074413 | -0.985716 | 1.058791 | -1.407669 | -0.692308 | -0.484322 | 1.238906 | -0.908444 | -0.726348 | ... | False | True | False | False | True | False | False | True | True | False |
5 rows × 73 columns
data_stability= TestSuite(tests=[
DataStabilityTestPreset(),
])
data_stability.run(current_data=dataset.iloc[:40], reference_data=dataset.iloc[40:], column_mapping=None)
data_stability data_drift_report = Report(metrics=[
DataDriftPreset(),
])
data_drift_report.run(current_data=dataset.iloc[:60], reference_data=dataset.iloc[60:], column_mapping=None)
data_drift_reportdata_stability= TestSuite(tests=[
DataStabilityTestPreset(),
])
data_stability.run(current_data=dataset.iloc[:40], reference_data=dataset.iloc[40:], column_mapping=None)
data_stability data_drift_report = Report(metrics=[
DataDriftPreset(),
])
data_drift_report.run(current_data=dataset.iloc[:40], reference_data=dataset.iloc[40:], column_mapping=None)
data_drift_report